rm(list = ls())

library(rvest)
library(stringr)
library(lubridate)
library(tidyverse)
library(here)
library(ggplot2)
library(dplyr)
library(ggthemes)
library(cowplot)

source("functions.R")


# Share by country and language -------------------------------------------


## French-speaking ------------------------------------------------------------

##Density Stress
StressFR <- read.csv(here("data", "Factiva", "StressFR.csv"), skip = 3)
StressFR <- StressFR %>% filter(str_detect(Date, "Start Date"))

StressCH_FR <- read.csv(here("data", "Factiva", "StressCH_FR.csv"), skip = 3)
StressCH_FR <- StressCH_FR %>% filter(str_detect(Date, "Start Date"))

StressFR$Date <- StressFR$Date %>% str_extract("\\d\\d\\d\\d")
StressCH_FR$Date <- StressCH_FR$Date %>% str_extract("\\d\\d\\d\\d")


##Imm
ImmFR <- read.csv(here("data", "Factiva", "MigrantFR.csv"), skip = 3)
ImmFR <- ImmFR %>% filter(str_detect(Date, "Start Date"))

ImmCH_FR <- read.csv(here("data", "Factiva", "MigrantCH_FR.csv"), skip = 3)
ImmCH_FR <- ImmCH_FR %>% filter(str_detect(Date, "Start Date"))

ImmFR$Date <- ImmFR$Date %>% str_extract("\\d\\d\\d\\d")
ImmCH_FR$Date <- ImmCH_FR$Date %>% str_extract("\\d\\d\\d\\d")


Plot_FR <- StressFR %>% mutate(Country = "France") %>% bind_rows(StressCH_FR %>% mutate(Country = "Switzerland")) %>% rename(Stress = Document.Count) %>% 
  left_join(ImmFR %>% mutate(Country = "France") %>% bind_rows(ImmCH_FR %>% mutate(Country = "Switzerland"))) %>% 
  mutate(`Stress Rate` = as.numeric(Stress)/as.numeric(Document.Count))

start_date <- Plot_FR %>% group_by(Country) %>% summarise(start_date = min(Date) %>% as.numeric) %>% summarise(start_date = max(start_date)) %>% pull(start_date)

Plot_FR_Out <- Plot_FR %>% filter(Date >= start_date) %>% 
  mutate(Country = factor(Country, levels = c("Switzerland", "France")))  %>% 
  ggplot(aes(x = as.numeric(Date), y = `Stress Rate`, color = Country, group = Country, shape = Country, linetype = Country)) +
  geom_line() +
  geom_point() +
  labs(subtitle = "Coverage of Density Stress in French-Language Newspapers",
       x = "Year",
       y = "Share of total articles") +
  theme_bw() +
  scale_color_manual(values = cbPalette[-3], name = NULL) + 
  scale_shape_manual(values = c(16, 17), name = NULL) + 
  scale_linetype_manual(values = c("solid", "dashed"), name = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_blank(), legend.position = "bottom")
ggsave(plot = Plot_FR_Out, here("figures", "Factiva_FR_Countries.pdf"), height = 4, width = 5)

## German-speaking ------------------------------------------------------------
##Density Stress
StressDE <- read.csv(here("data", "Factiva", "StressDE.csv"), skip = 3)
StressDE <- StressDE %>% filter(str_detect(Date, "Start Date"))

StressCH_DE <- read.csv(here("data", "Factiva", "StressCH_DE.csv"), skip = 3)
StressCH_DE <- StressCH_DE %>% filter(str_detect(Date, "Start Date"))

StressDE$Date <- StressDE$Date %>% str_extract("\\d\\d\\d\\d")
StressCH_DE$Date <- StressCH_DE$Date %>% str_extract("\\d\\d\\d\\d")


##Imm
ImmDE <- read.csv(here("data", "Factiva", "MigrantDE.csv"), skip = 3)
ImmDE <- ImmDE %>% filter(str_detect(Date, "Start Date"))

ImmCH_DE <- read.csv(here("data", "Factiva", "MigrantCH_DE.csv"), skip = 3)
ImmCH_DE <- ImmCH_DE %>% filter(str_detect(Date, "Start Date"))

ImmDE$Date <- ImmDE$Date %>% str_extract("\\d\\d\\d\\d")
ImmCH_DE$Date <- ImmCH_DE$Date %>% str_extract("\\d\\d\\d\\d")


Plot_DE <- StressDE %>% mutate(Country = "Germany") %>% bind_rows(StressCH_DE %>% mutate(Country = "Switzerland")) %>% rename(Stress = Document.Count) %>% 
  left_join(ImmDE %>% mutate(Country = "Germany") %>% bind_rows(ImmCH_DE %>% mutate(Country = "Switzerland"))) %>% 
  mutate(`Stress Rate` = as.numeric(Stress)/as.numeric(Document.Count))

start_date <- Plot_DE %>% group_by(Country) %>% summarise(start_date = min(Date) %>% as.numeric) %>% summarise(start_date = max(start_date)) %>% pull(start_date)

Plot_DE_Out <- Plot_DE %>% filter(Date >= start_date) %>% 
  mutate(Country = factor(Country, levels = c("Switzerland", "Germany")))  %>% 
  ggplot(aes(x = as.numeric(Date), y = `Stress Rate`, color = Country, group = Country, shape = Country, linetype = Country)) +
  geom_line() +
  geom_point() +
  labs(subtitle = "Coverage of Density Stress in German-Language Newspapers",
       x = "Year",
       y = "Share of total articles") +
  theme_bw() +
  scale_color_manual(values = cbPalette[-3], name = NULL) + 
  scale_shape_manual(values = c(16, 17), name = NULL) + 
  scale_linetype_manual(values = c("solid", "dashed"), name = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_blank(), legend.position = "bottom")
ggsave(plot = Plot_DE_Out, here("figures", "Factiva_DE_Countries.pdf"), height = 4, width = 5)


## Italian ------------------------------------------------------------
##Density Stress
StressIT <- read.csv(here("data", "Factiva", "StressIT.csv"), skip = 3)
StressIT <- StressIT %>% filter(str_detect(Date, "Start Date"))
StressIT$Date <- StressIT$Date %>% str_extract("\\d\\d\\d\\d")


##Imm
ImmIT <- read.csv(here("data", "Factiva", "MigrantIT.csv"), skip = 3)
ImmIT <- ImmIT %>% filter(str_detect(Date, "Start Date"))
ImmIT$Date <- ImmIT$Date %>% str_extract("\\d\\d\\d\\d")

Plot_IT <- StressIT %>% mutate(Country = "Italy") %>% rename(Stress = Document.Count) %>% 
  left_join(ImmIT %>% mutate(Country = "Italy")) %>% 
  mutate(`Stress Rate` = as.numeric(Stress)/as.numeric(Document.Count))

Plot_IT_Out <- Plot_IT %>% 
  ggplot(aes(x = as.numeric(Date), y = `Stress Rate`, color = Country, group = Country)) +
  geom_line() +
  geom_point() +
  labs(subtitle = "Coverage of Density Stress in Italian Newspapers",
       x = "Year",
       y = "Share of total articles") +
  theme_bw() +
  scale_color_manual(values = cbPalette[-1], name = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_blank(), legend.position = "bottom")
ggsave(plot = Plot_IT_Out, here("figures", "Factiva_IT_Countries.pdf"), height = 4, width = 5)


# Political Parties ------------------------------------------------------------
## German speaking
stressDE <- read.csv(here("data", "Factiva", "StressAllDE.csv"), skip = 3)
stressDE <- stressDE %>% filter(str_detect(Date, "Start Date"))

econDE <- read.csv(here("data", "Factiva", "EconAllDE.csv"), skip = 3)
econDE <- econDE %>% filter(str_detect(Date, "Start Date"))

migrantDE <- read.csv(here("data", "Factiva", "MigrantAllDE.csv"), skip = 3)
migrantDE <- migrantDE %>% filter(str_detect(Date, "Start Date"))


DE_fig_df <-
  bind_rows(econDE %>% mutate(topic = "Economic")) %>%
  bind_rows(stressDE %>% mutate(topic = "Overcrowding")) %>%
  mutate(Date = str_extract(Date, "\\d\\d\\d\\d") %>% as.numeric,
         Document.Count = as.numeric(Document.Count)) %>%
  filter(Date < 2024) %>%
  left_join(migrantDE %>% mutate(Date = str_extract(Date, "\\d\\d\\d\\d") %>% as.numeric,
                                 Denominator = as.numeric(Document.Count)) %>%
              dplyr::select(-Document.Count)) %>%
  mutate(Document.Share = Document.Count / Denominator) %>% 
  bind_rows(tibble(Date = 1994, topic = "Economic", Document.Share = 0)) #Impute 0 for years with no articles


DE_fig <- DE_fig_df %>% 
  mutate(topic = factor(topic, levels = c("Overcrowding", "Economic")))  %>% 
  ggplot(aes(x = as.numeric(Date), y = as.numeric(Document.Share), color = topic, group = topic, shape = topic, linetype = topic)) +
  geom_line() +
  geom_point() +
  labs(subtitle = "Articles in German\nEconomic and Overcrowding Concerns",
       x = "Year",
       y = "Share of total articles") +
  theme_bw() +
  scale_color_manual(values = cbPalette[-3], name = NULL) + 
  scale_shape_manual(values = c(16, 17), name = NULL) + 
  scale_linetype_manual(values = c("solid", "dashed"), name = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_blank(), legend.position = "bottom")

ggsave(plot = DE_fig, here("figures", "Factiva_DE_Parties.pdf"), height = 4, width = 4)


## French speaking
stressFR <- read.csv(here("data", "Factiva", "StressAllFR.csv"), skip = 3)
stressFR <- stressFR %>% filter(str_detect(Date, "Start Date"))

econFR <- read.csv(here("data", "Factiva", "EconAllFR.csv"), skip = 3)
econFR <- econFR %>% filter(str_detect(Date, "Start Date"))

migrantFR <- read.csv(here("data", "Factiva", "MigrantAllFR.csv"), skip = 3)
migrantFR <- migrantFR %>% filter(str_detect(Date, "Start Date"))


FR_fig_df <-
  bind_rows(econFR %>% mutate(topic = "Economic")) %>%
  bind_rows(stressFR %>% mutate(topic = "Overcrowding")) %>%
  mutate(Date = str_extract(Date, "\\d\\d\\d\\d") %>% as.numeric,
         Document.Count = as.numeric(Document.Count)) %>%
  filter(Date < 2024) %>%
  left_join(migrantFR %>% mutate(Date = str_extract(Date, "\\d\\d\\d\\d") %>% as.numeric,
                                 Denominator = as.numeric(Document.Count)) %>%
              dplyr::select(-Document.Count)) %>%
  mutate(Document.Share = Document.Count / Denominator) %>% 
  bind_rows(tibble(Date = 1996:1998, topic = "Overcrowding", Document.Share = 0)) #Impute 0 for years during which there's no overcrowding

FR_fig <- FR_fig_df %>% 
  mutate(topic = factor(topic, levels = c("Overcrowding", "Economic")))  %>% 
  ggplot(aes(x = as.numeric(Date), y = as.numeric(Document.Share), color = topic, group = topic, shape = topic, linetype = topic)) +
  geom_line() +
  geom_point() +
  labs(subtitle = "Articles in French\nEconomic and Overcrowding Concerns",
       x = "Year",
       y = "Share of total articles") +
  theme_bw() +
  scale_color_manual(values = cbPalette[-3], name = NULL) + 
  scale_shape_manual(values = c(16, 17), name = NULL) + 
  scale_linetype_manual(values = c("solid", "dashed"), name = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_blank(), legend.position = "bottom")

ggsave(plot = FR_fig, here("figures", "Factiva_FR_Parties.pdf"), height = 4, width = 4)
